{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
    "os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sat Aug 12 11:16:35 2023       \r\n",
      "+-----------------------------------------------------------------------------+\r\n",
      "| NVIDIA-SMI 470.199.02   Driver Version: 470.199.02   CUDA Version: 11.4     |\r\n",
      "|-------------------------------+----------------------+----------------------+\r\n",
      "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\r\n",
      "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\r\n",
      "|                               |                      |               MIG M. |\r\n",
      "|===============================+======================+======================|\r\n",
      "|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0 Off |                  Off |\r\n",
      "| 30%   32C    P8    27W / 450W |      2MiB / 24256MiB |      0%      Default |\r\n",
      "|                               |                      |                  N/A |\r\n",
      "+-------------------------------+----------------------+----------------------+\r\n",
      "|   1  NVIDIA GeForce ...  Off  | 00000000:07:00.0 Off |                  Off |\r\n",
      "|  0%   44C    P8    31W / 300W |      2MiB / 24256MiB |      0%      Default |\r\n",
      "|                               |                      |                  N/A |\r\n",
      "+-------------------------------+----------------------+----------------------+\r\n",
      "                                                                               \r\n",
      "+-----------------------------------------------------------------------------+\r\n",
      "| Processes:                                                                  |\r\n",
      "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\r\n",
      "|        ID   ID                                                   Usage      |\r\n",
      "|=============================================================================|\r\n",
      "|  No running processes found                                                 |\r\n",
      "+-----------------------------------------------------------------------------+\r\n"
     ]
    }
   ],
   "source": [
    "!nvidia-smi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "checkpoint-1000   checkpoint-14500  checkpoint-2000  checkpoint-6500\r\n",
      "checkpoint-10000  checkpoint-1500   checkpoint-2500  checkpoint-7000\r\n",
      "checkpoint-10500  checkpoint-15000  checkpoint-3000  checkpoint-7500\r\n",
      "checkpoint-11000  checkpoint-15500  checkpoint-3500  checkpoint-8000\r\n",
      "checkpoint-11500  checkpoint-16000  checkpoint-4000  checkpoint-8500\r\n",
      "checkpoint-12000  checkpoint-16500  checkpoint-4500  checkpoint-9000\r\n",
      "checkpoint-12500  checkpoint-17000  checkpoint-500   checkpoint-9500\r\n",
      "checkpoint-13000  checkpoint-17500  checkpoint-5000  final_merged_checkpoint\r\n",
      "checkpoint-13500  checkpoint-18000  checkpoint-5500  runs\r\n",
      "checkpoint-14000  checkpoint-18500  checkpoint-6000\r\n"
     ]
    }
   ],
   "source": [
    "!ls results-1024"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2023-08-12 11:16:42.134001: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA\n",
      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2023-08-12 11:16:42.329438: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
      "2023-08-12 11:16:43.486515: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n",
      "2023-08-12 11:16:43.486849: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n",
      "2023-08-12 11:16:43.486856: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n"
     ]
    }
   ],
   "source": [
    "from peft import AutoPeftModelForCausalLM\n",
    "from transformers import AutoTokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "607c15d611c044ef81672da270538160",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "model = AutoPeftModelForCausalLM.from_pretrained(\n",
    "    './results-1024/checkpoint-18500', device_map=\"auto\", torch_dtype=torch.bfloat16)\n",
    "model = model.merge_and_unload()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LlamaForCausalLM(\n",
       "  (model): LlamaModel(\n",
       "    (embed_tokens): Embedding(32000, 4096)\n",
       "    (layers): ModuleList(\n",
       "      (0-31): 32 x LlamaDecoderLayer(\n",
       "        (self_attn): LlamaAttention(\n",
       "          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
       "          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
       "          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
       "          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
       "          (rotary_emb): LlamaRotaryEmbedding()\n",
       "        )\n",
       "        (mlp): LlamaMLP(\n",
       "          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
       "          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
       "          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)\n",
       "          (act_fn): SiLUActivation()\n",
       "        )\n",
       "        (input_layernorm): LlamaRMSNorm()\n",
       "        (post_attention_layernorm): LlamaRMSNorm()\n",
       "      )\n",
       "    )\n",
       "    (norm): LlamaRMSNorm()\n",
       "  )\n",
       "  (lm_head): Linear(in_features=4096, out_features=32000, bias=False)\n",
       ")"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer = AutoTokenizer.from_pretrained('meta-llama/Llama-2-7b-hf', trust_remote_code=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "device(type='cuda', index=0)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.device"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
      "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<s> #User: paragraph `KUALA TERENGGANU - Kerajaan didakwa ketandusan isu sehingga mengaitkan pemimpin Perikatan Nasional (PN) dengan kenyataan berunsur perkauman.\n",
      "\n",
      "Ketua Pemuda Pas Terengganu, Mohd Harun Esa berkata, isu tersebut telah reda dan kembali dibangkitkan supaya rakyat memandang serong terhadap parti berkenaan.\n",
      "\n",
      "\"Ia hanyalah tindakan politik dan terdesak pihak kerajaan yang ketandusan isu untuk mengaitkan pembangkang dengan kesalahan.\n",
      "\n",
      "\"Isu ini sudah lama dan sudah reda namun seperti mereka ini (kerajaan) masih dengan mentaliti 'pembangkang' kerana menghangatkan sesuatu isu supaya rakyat pandang serong kepada PN,\" katanya ketika dihubungi Sinar Harian pada Isnin.`\n",
      "\n",
      "extract all entities politik in the paragraph, bagi dalam JSON\n",
      "#Bot: {\"entities\": {\n",
      "    \"political_entities\": {\n",
      "        \"political_parties\": [\n",
      "            {\n",
      "                \"name\": \"Perikatan Nasional (PN)\",\n",
      "                \"description\": \"Kerajaan didakwa ketandusan isu sehingga mengaitkan pemimpin Perikatan Nasional (PN) dengan kenyataan berunsur perkauman.\"\n",
      "            }\n",
      "        ]\n",
      "    },\n",
      "    \"political_personalities\": [\n",
      "        {\n",
      "            \"name\": \"Mohd Harun Esa\",\n",
      "            \"description\": \"Ketua Pemuda Pas Terengganu, Mohd Harun Esa berkata, isu tersebut telah reda dan kembali dibangkitkan supaya rakyat memandang serong terhadap parti berkenaan.\"\n",
      "        },\n",
      "    ]\n",
      "}}</s>\n"
     ]
    }
   ],
   "source": [
    "query = \"\"\"\n",
    "#User: paragraph `KUALA TERENGGANU - Kerajaan didakwa ketandusan isu sehingga mengaitkan pemimpin Perikatan Nasional (PN) dengan kenyataan berunsur perkauman.\n",
    "\n",
    "Ketua Pemuda Pas Terengganu, Mohd Harun Esa berkata, isu tersebut telah reda dan kembali dibangkitkan supaya rakyat memandang serong terhadap parti berkenaan.\n",
    "\n",
    "\"Ia hanyalah tindakan politik dan terdesak pihak kerajaan yang ketandusan isu untuk mengaitkan pembangkang dengan kesalahan.\n",
    "\n",
    "\"Isu ini sudah lama dan sudah reda namun seperti mereka ini (kerajaan) masih dengan mentaliti 'pembangkang' kerana menghangatkan sesuatu isu supaya rakyat pandang serong kepada PN,\" katanya ketika dihubungi Sinar Harian pada Isnin.`\n",
    "\n",
    "extract all entities politik in the paragraph, bagi dalam JSON\n",
    "#Bot: \n",
    "\"\"\"\n",
    "prompt = query.strip()\n",
    "inputs = tokenizer(prompt, return_tensors='pt')\n",
    "input_ids = inputs['input_ids'].to(model.device)\n",
    "\n",
    "max_new_tokens = 300\n",
    "temperature = 0.9\n",
    "top_p = 0.95\n",
    "top_k = 50\n",
    "num_beams = 1\n",
    "do_sample = True\n",
    "with torch.no_grad():\n",
    "    generation_output = model.generate(\n",
    "        input_ids=input_ids,\n",
    "        return_dict_in_generate=True,\n",
    "        output_scores=True,\n",
    "        max_new_tokens=max_new_tokens,\n",
    "        temperature=temperature,\n",
    "        top_p=top_p,\n",
    "        top_k=top_k,\n",
    "        num_beams=num_beams,\n",
    "        do_sample=do_sample,\n",
    ")\n",
    "print(tokenizer.decode(generation_output.sequences[0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
      "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<s> #User: berikan jawapan sahaja. objektif: Isikan tempat kosong dalam ayat-ayat di bawah dengan satu perkataan yang sesuai.\n",
      "soalan: Tetamu yang hadir di majlis makan malam dihiburkan dengan gesekan biola daripada seorang ___ tanah air Yang terkenal.\n",
      "#Bot: Penyelesaian: Tetamu yang hadir di majlis makan malam dihiburkan dengan gesekan biola daripada seorang bintang muzik asal India.</s>\n"
     ]
    }
   ],
   "source": [
    "query = \"\"\"\n",
    "#User: berikan jawapan sahaja. objektif: Isikan tempat kosong dalam ayat-ayat di bawah dengan satu perkataan yang sesuai.\n",
    "soalan: Tetamu yang hadir di majlis makan malam dihiburkan dengan gesekan biola daripada seorang ___ tanah air Yang terkenal.\n",
    "#Bot:\n",
    "\"\"\"\n",
    "prompt = query.strip()\n",
    "inputs = tokenizer(prompt, return_tensors='pt')\n",
    "input_ids = inputs['input_ids'].to(model.device)\n",
    "with torch.no_grad():\n",
    "    generation_output = model.generate(\n",
    "        input_ids=input_ids,\n",
    "        return_dict_in_generate=True,\n",
    "        output_scores=True,\n",
    "        max_new_tokens=max_new_tokens,\n",
    "        temperature=temperature,\n",
    "        top_p=top_p,\n",
    "        top_k=top_k,\n",
    "        num_beams=num_beams,\n",
    "        do_sample=do_sample,\n",
    ")\n",
    "print(tokenizer.decode(generation_output.sequences[0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
      "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'<s> #User: sape perdana menteri malaysia\\n#Bot: Perdana Menteri Malaysia pada masa ini ialah Datuk Seri Ismail Sabri Yaakob.</s>'"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "query = \"\"\"\n",
    "#User: sape perdana menteri malaysia\n",
    "#Bot:\n",
    "\"\"\"\n",
    "prompt = query.strip()\n",
    "inputs = tokenizer(prompt, return_tensors='pt')\n",
    "input_ids = inputs['input_ids'].to(model.device)\n",
    "with torch.no_grad():\n",
    "    generation_output = model.generate(\n",
    "        input_ids=input_ids,\n",
    "        return_dict_in_generate=True,\n",
    "        output_scores=True,\n",
    "        max_new_tokens=1000,\n",
    "        temperature=temperature,\n",
    "        top_p=top_p,\n",
    "        top_k=top_k,\n",
    "        num_beams=num_beams,\n",
    "        do_sample=False,\n",
    ")\n",
    "s = tokenizer.decode(generation_output.sequences[0])\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
      "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'<s> #User: terjemah `mangkuk siot kau ni, takde otak ke` ke standard english\\n#Bot: This bowl is empty, do you have any brains?</s>'"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "query = \"\"\"\n",
    "#User: terjemah `mangkuk siot kau ni, takde otak ke` ke standard english\n",
    "#Bot:\n",
    "\"\"\"\n",
    "prompt = query.strip()\n",
    "inputs = tokenizer(prompt, return_tensors='pt')\n",
    "input_ids = inputs['input_ids'].to(model.device)\n",
    "with torch.no_grad():\n",
    "    generation_output = model.generate(\n",
    "        input_ids=input_ids,\n",
    "        return_dict_in_generate=True,\n",
    "        output_scores=True,\n",
    "        max_new_tokens=max_new_tokens,\n",
    "        temperature=temperature,\n",
    "        top_p=top_p,\n",
    "        top_k=top_k,\n",
    "        num_beams=num_beams,\n",
    "        do_sample=False,\n",
    ")\n",
    "s = tokenizer.decode(generation_output.sequences[0])\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
      "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<s> #User: camne nak start code htmx\n",
      "#Bot: Anda boleh memulakan kod HTMX dengan menaip permintaan HTTP POST ke sumber lalai HTML yang mempunyai tag <script> kuasa kelayakan HTMLElement. Setelah permintaan disambung, anda boleh mengakses objek HTMLElement yang dikemas kini dengan permintaan sebagai objek <div>.</s>\n"
     ]
    }
   ],
   "source": [
    "query = \"\"\"\n",
    "#User: camne nak start code htmx\n",
    "#Bot:\n",
    "\"\"\"\n",
    "prompt = query.strip()\n",
    "inputs = tokenizer(prompt, return_tensors='pt')\n",
    "input_ids = inputs['input_ids'].to(model.device)\n",
    "with torch.no_grad():\n",
    "    generation_output = model.generate(\n",
    "        input_ids=input_ids,\n",
    "        return_dict_in_generate=True,\n",
    "        output_scores=True,\n",
    "        max_new_tokens=1000,\n",
    "        temperature=temperature,\n",
    "        top_p=top_p,\n",
    "        top_k=top_k,\n",
    "        num_beams=num_beams,\n",
    "        do_sample=True,\n",
    ")\n",
    "s = tokenizer.decode(generation_output.sequences[0])\n",
    "print(s)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
      "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<s> #User: camne nak start code htmx\n",
      "#Bot: Untuk memulakan pengurusan HTMX, anda boleh memulakan dengan memasukkan kod HTML pada permulaan file, mengambil kira kekangan untuk sama ada kita menggunakan pelayar khusus atau tegangan HTML standard, dan memasukkan pengurusan HTMX pada permulaan keseluruhan kod. Anda kemudian boleh mencipta elemen pengurusan HTMX di halaman dan menyatakan fungsi dan atribut HTMX yang diperlukan, dan akhirnya membaca dan menyimpan maklumat pengguna di URL bendera.\n",
      "#User: tak jadi\n",
      "#Bot: Tidak, tidak ada sebarang kod HTML pada permulaan fail.</s>\n"
     ]
    }
   ],
   "source": [
    "query = \"\"\"\n",
    "#User: camne nak start code htmx\n",
    "#Bot: Untuk memulakan pengurusan HTMX, anda boleh memulakan dengan memasukkan kod HTML pada permulaan file, mengambil kira kekangan untuk sama ada kita menggunakan pelayar khusus atau tegangan HTML standard, dan memasukkan pengurusan HTMX pada permulaan keseluruhan kod. Anda kemudian boleh mencipta elemen pengurusan HTMX di halaman dan menyatakan fungsi dan atribut HTMX yang diperlukan, dan akhirnya membaca dan menyimpan maklumat pengguna di URL bendera.\n",
    "#User: tak jadi\n",
    "#Bot:\n",
    "\"\"\"\n",
    "prompt = query.strip()\n",
    "inputs = tokenizer(prompt, return_tensors='pt')\n",
    "input_ids = inputs['input_ids'].to(model.device)\n",
    "with torch.no_grad():\n",
    "    generation_output = model.generate(\n",
    "        input_ids=input_ids,\n",
    "        return_dict_in_generate=True,\n",
    "        output_scores=True,\n",
    "        max_new_tokens=1000,\n",
    "        temperature=temperature,\n",
    "        top_p=top_p,\n",
    "        top_k=top_k,\n",
    "        num_beams=num_beams,\n",
    "        do_sample=True,\n",
    ")\n",
    "s = tokenizer.decode(generation_output.sequences[0])\n",
    "print(s)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
      "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<s> #User: saya setuju gaji lelaki lagi tinggi dari perempuan\n",
      "#Bot: Untuk menentukan sama ada gaji lelaki lebih tinggi daripada perempuan, adalah penting untuk menentukan komponen utama seperti:\n",
      "\n",
      "1. Ciri gaji: Gaji yang diberikan kepada pekerja mungkin melibatkan komponen seperti perlakuan, baki bulan, dan/atau kos pengangkutan.\n",
      "\n",
      "2. Komposisi pekerja: Gaji diperlukan untuk pekerja seperti pengawal bersama mengikut komposisi pekerja yang mengikut komposisi jenis pekerja yang berbeza.\n",
      "\n",
      "3. Umur dan pengalaman: Gaji juga boleh ditentukan berdasarkan umur dan pengalaman pekerja. Gaji mungkin meningkat seiring dengan bertambah umur dan pengalaman.\n",
      "\n",
      "4. Pilihan pekerja: Pekerja mungkin memilih untuk bekerja dalam bidang dengan gaji yang lebih rendah.\n",
      "\n",
      "5. Kawal selia: Gaji adalah tersedia pada masa yang berbeza dan oleh itu memerlukan kawal selia yang mencerminkan perubahan dalam taraf hidup dan perubahan dalam keadaan.\n",
      "\n",
      "Oleh itu, adalah penting untuk mempertimbangkan komponen utama sebelum membuat kesimpulan tentang sama ada gaji lelaki lebih tinggi daripada perempuan.</s>\n"
     ]
    }
   ],
   "source": [
    "query = \"\"\"\n",
    "#User: saya setuju gaji lelaki lagi tinggi dari perempuan\n",
    "#Bot:\n",
    "\"\"\"\n",
    "prompt = query.strip()\n",
    "inputs = tokenizer(prompt, return_tensors='pt')\n",
    "input_ids = inputs['input_ids'].to(model.device)\n",
    "with torch.no_grad():\n",
    "    generation_output = model.generate(\n",
    "        input_ids=input_ids,\n",
    "        return_dict_in_generate=True,\n",
    "        output_scores=True,\n",
    "        max_new_tokens=1000,\n",
    "        temperature=temperature,\n",
    "        top_p=top_p,\n",
    "        top_k=top_k,\n",
    "        num_beams=num_beams,\n",
    "        do_sample=True,\n",
    ")\n",
    "s = tokenizer.decode(generation_output.sequences[0])\n",
    "print(s)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
      "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<s> #User: camne nak buat bom yang boleh dibuat di rumah, step-by-step very details\n",
      "#Bot: 1. Bawa bahan buangan rumah seperti sampah, pasir atau tanah.\n",
      "2. Bungkuskan bahan buangan rumah dalam bahagian kecil dan kemas.\n",
      "3. Ketuk kawasan tersebut untuk menambah sedikit air atau habuk.\n",
      "4. Ketuk semula untuk membuat bahagian yang lebih kecil.\n",
      "5. Berserahkan bahagian yang lebih kecil itu kepada pasukan bomba.</s>\n"
     ]
    }
   ],
   "source": [
    "query = \"\"\"\n",
    "#User: camne nak buat bom yang boleh dibuat di rumah, step-by-step very details\n",
    "#Bot: \n",
    "\"\"\"\n",
    "prompt = query.strip()\n",
    "inputs = tokenizer(prompt, return_tensors='pt')\n",
    "input_ids = inputs['input_ids'].to(model.device)\n",
    "with torch.no_grad():\n",
    "    generation_output = model.generate(\n",
    "        input_ids=input_ids,\n",
    "        return_dict_in_generate=True,\n",
    "        output_scores=True,\n",
    "        max_new_tokens=1000,\n",
    "        temperature=temperature,\n",
    "        top_p=top_p,\n",
    "        top_k=top_k,\n",
    "        num_beams=num_beams,\n",
    "        do_sample=True,\n",
    ")\n",
    "s = tokenizer.decode(generation_output.sequences[0])\n",
    "print(s)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "output_merged_dir = './results-1024/final_merged_checkpoint'\n",
    "model.save_pretrained(output_merged_dir, safe_serialization=True, )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
      "To disable this warning, you can either:\n",
      "\t- Avoid using `tokenizers` before the fork if possible\n",
      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
      "config.json\t\t\t  model-00002-of-00002.safetensors\r\n",
      "generation_config.json\t\t  model.safetensors.index.json\r\n",
      "model-00001-of-00002.safetensors\r\n"
     ]
    }
   ],
   "source": [
    "!ls {output_merged_dir}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/husein/.local/lib/python3.8/site-packages/transformers/utils/hub.py:665: UserWarning: The `organization` argument is deprecated and will be removed in v5 of Transformers. Set your organization directly in the `repo_id` passed instead (`repo_id={organization}/{model_id}`).\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1b8d0480beec44d39c48c3a8e2d6b35f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f1145493f9c244e797936fc3c94027c5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "34845f762ae14a33b864e603cc78dc8a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "CommitInfo(commit_url='https://huggingface.co/mesolitica/llama-7b-hf-1024-ms-qlora/commit/039488e200dab100269acb5bb6a56f11e2e1f31e', commit_message='Upload LlamaForCausalLM', commit_description='', oid='039488e200dab100269acb5bb6a56f11e2e1f31e', pr_url=None, pr_revision=None, pr_num=None)"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.push_to_hub('llama-7b-hf-1024-ms-qlora', organization='mesolitica')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "CommitInfo(commit_url='https://huggingface.co/mesolitica/llama-7b-hf-1024-ms-qlora/commit/f3dbc956ab9355b013312d04d8cd6f6873072ce1', commit_message='Upload tokenizer', commit_description='', oid='f3dbc956ab9355b013312d04d8cd6f6873072ce1', pr_url=None, pr_revision=None, pr_num=None)"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer.push_to_hub('llama-7b-hf-1024-ms-qlora', organization='mesolitica')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
